knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
library(ggplot2)
library(readxl)
library(stringr)
library(lubridate)
library(plotly)
dataset <- read.csv("dataset.csv", stringsAsFactors = FALSE)
str(dataset)'data.frame': 245 obs. of 4 variables:
$ X : int 1 2 3 4 5 6 7 8 9 10 ...
$ date: chr "2018-02-07" "2018-02-07" "2018-02-07" "2018-02-08" ...
$ acct: chr "naver.com" "gmail.com" "gmail.com" "nate.com" ...
$ path: chr "페이스북" "블로그, 페이스북" "홈페이지" "페이스북" ...
unique(dataset$acct) [1] "naver.com" "gmail.com" "nate.com"
[4] "allbr.co.kr" "yahoo.com" "hanmail.net"
[7] "daum.net" "naver.con" "afotrade.com"
[10] NA "nvaer.com" "hotmail.com"
[13] "nomadconnection.com" "hanmauk.net" "ajou.ac.kr"
[16] "legalinsight.kr"
dataset$acct[dataset$acct %in% c("naver.con", "nvaer.com")] <- "naver.com"
dataset$acct[dataset$acct %in% c("hanmail.net", "hanmauk.net")] <- "daum.net"
dataset$date <- as.Date(dataset$date)
dataset$acct <- as.factor(dataset$acct)dataset$fb <- str_detect(dataset$path, "페이스북")
dataset$blog <- str_detect(dataset$path, "블로그")
dataset$web <- str_detect(dataset$path, "홈페이지")
dataset$jiin <- str_detect(dataset$path, "지인")
dataset$search <- str_detect(dataset$path, "검색")
dataset$cafe <- str_detect(dataset$path, "카페")
dataset$insta <- str_detect(dataset$path, "인스타")# count by acct
dataset %>%
group_by(acct) %>%
summarise(count = length(acct)) %>%
arrange(desc(count))# A tibble: 12 x 2
acct count
<fct> <int>
1 naver.com 120
2 gmail.com 84
3 daum.net 26
4 nate.com 4
5 allbr.co.kr 2
6 yahoo.com 2
7 <NA> 2
8 afotrade.com 1
9 ajou.ac.kr 1
10 hotmail.com 1
11 legalinsight.kr 1
12 nomadconnection.com 1
# mutate acctType
dataset <- dataset %>%
mutate(acctType =
ifelse(acct=="naver.com", "naver",
ifelse(acct=="gmail.com", "gmail",
ifelse(acct=="hanmail.net", "daum",
ifelse(acct=="daum.net", "daum", "other")))))
dataset$acctType <- as.factor(dataset$acctType)
dataset_acctType <- dataset %>%
group_by(acctType) %>%
summarise(
fb = sum(fb), blog = sum(blog), web = sum(web),
jiin = sum(jiin), search = sum(search),
cafe = sum(cafe), insta = sum(insta))
dataset_acctType <- dataset_acctType[1:4,]dataset_month <- dataset %>%
group_by(Y = year(date), M = month(date)) %>%
summarise(
fb = sum(fb), blog = sum(blog), web = sum(web),
jiin = sum(jiin), search = sum(search),
cafe = sum(cafe), insta = sum(insta)) %>%
mutate(Date = as.Date(paste(Y, M, 1, sep = "-")))# multiple geom_line
fig1 <- ggplot(dataset_month, aes(x = Date)) +
geom_line(aes(y = fb, colour = "fb")) +
geom_line(aes(y = blog, colour = "blog")) +
geom_line(aes(y = web, colour = "web")) +
geom_line(aes(y = jiin, colour = "jiin")) +
geom_line(aes(y = cafe, colour = "cafe")) +
geom_line(aes(y = insta, colour = "insta")) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
scale_x_date(breaks = dataset_month$Date) +
labs(x = "Month", y = NULL)ggplotly(fig1)# All Time
df <- data.frame(
channel = colnames(dataset_month)[3:9],
freq = colSums(dataset_month[, c(3:9)]),
stringsAsFactors = FALSE)
# M91.page 39 - pie-chart
fig2 <- ggplot(df, aes(x = "", y = freq, fill = factor(channel))) +
geom_bar(width = 1, stat = "identity") +
theme(axis.line = element_blank(),
plot.title = element_text(hjust=0.5)) +
labs(fill="channel", x=NULL, y=NULL) +
coord_polar(theta = "y", start=0)print(fig2)# facet by month
# googled "r facet piechart"
# https://stackoverflow.com/questions/25372055/how-can-i-use-facet-wrap-using-pie-charts
library(reshape2)
dataset_month_long <-
data.frame(Date = dataset_month$Date, dataset_month[,3:9]) %>%
melt(id = "Date") # Change it to "long" type - not tidy!
fig4 <-
ggplot(dataset_month_long,
aes(x = 1, y = value, fill = variable)) +
geom_bar(stat = "identity",
color = "black",
position = position_fill()) +
coord_polar(theta = "y") +
theme(axis.ticks = element_blank(),
axis.text.x = element_text(colour='black'),
axis.text.y = element_blank(),
axis.title = element_blank(),
plot.title = element_text(hjust=0.5)) +
scale_y_continuous(
breaks = cumsum(dataset_month_long$value) - dataset_month_long$value/2,
labels = dataset_month_long$variable) +
facet_wrap( ~ Date)print(fig4)dataset_acctType_proportion <-
data.frame(
acctType = dataset_acctType$acctType,
round(dataset_acctType[,2:8]/rowSums(dataset_acctType[,2:8]), 2),
subtotal = rowSums(dataset_acctType[,2:8]))
dataset_acctType_proportion <- dataset_acctType_proportion %>% arrange(desc(subtotal))
dataset_acctType_proportion acctType fb blog web jiin search cafe insta subtotal
1 naver 0.46 0.16 0.05 0.04 0.16 0.03 0.10 135
2 gmail 0.44 0.09 0.10 0.05 0.15 0.09 0.10 94
3 daum 0.38 0.14 0.00 0.00 0.24 0.10 0.14 29
4 other 0.79 0.00 0.07 0.14 0.00 0.00 0.00 14
table(dataset$acct)
afotrade.com ajou.ac.kr allbr.co.kr
1 1 2
daum.net gmail.com hotmail.com
26 84 1
legalinsight.kr nate.com naver.com
1 4 120
nomadconnection.com yahoo.com
1 2
dataset_acctType_long <-
data.frame(acctType = dataset_acctType$acctType, dataset_acctType[,2:8]) %>%
melt(id = "acctType") # Change it to "long" type - not tidy!
fig5 <-
ggplot(dataset_acctType_long,
aes(x = 1, y = value, fill = variable)) +
geom_bar(stat = "identity",
color = "black",
position = position_fill()) +
coord_polar(theta = "y") +
theme(axis.ticks = element_blank(),
axis.text.x = element_text(colour='black'),
axis.text.y = element_blank(),
axis.title = element_blank(),
plot.title = element_text(hjust=0.5)) +
scale_y_continuous(
breaks = cumsum(dataset_acctType_long$value) - dataset_acctType_long$value/2,
labels = dataset_acctType_long$variable) +
facet_wrap( ~ acctType)print(fig5)